In [ ]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
#set background color grey
sns.set_theme(style="darkgrid")
In [ ]:
df = pd.read_csv("all_turns_2.csv")
df = df[df['person_robot'] == 'person']
df.drop(columns=['Unnamed: 0'], inplace=True)
df['turn_duration'] = 0.2*(df['end_idx'].astype('float') - df['start_idx'].astype('float'))
df.describe().T
Out[ ]:
count mean std min 25% 50% 75% max
participant_id 103.0 1825.067961 633.333838 407.000000 2101.500000 2104.000000 2107.000000 2111.000000
path_num 103.0 2.233010 0.730349 1.000000 2.000000 2.000000 3.000000 3.000000
turn_num 103.0 2.640777 1.708424 1.000000 1.000000 2.000000 4.000000 9.000000
start_idx 103.0 551.747573 438.237596 50.000000 255.000000 456.000000 674.000000 2199.000000
end_idx 103.0 594.553398 437.257983 86.000000 298.000000 482.000000 713.500000 2225.000000
walking_direction_lag 103.0 -3.155340 25.493081 -151.000000 -10.000000 -1.000000 4.000000 107.000000
walking_direction_base_corr 103.0 0.085065 0.434108 -0.857578 -0.315911 0.098156 0.443063 0.941918
walking_direction_lagged_corr 103.0 0.468114 0.174327 0.122957 0.342861 0.439514 0.584931 0.948226
walking_direction_dtw 103.0 37.905734 24.331792 4.239983 22.831765 31.335852 44.304690 151.617539
speeds_lag 103.0 -0.485437 13.936033 -48.000000 -7.000000 -1.000000 3.000000 62.000000
speeds_base_corr 103.0 0.219284 0.364609 -0.797566 -0.053144 0.222804 0.507383 0.881916
speeds_lagged_corr 103.0 0.514786 0.165729 0.171078 0.380823 0.485911 0.670622 0.881916
speeds_dtw 103.0 29.832122 15.988083 10.358754 21.324705 24.668994 34.570697 103.965381
mean_distance 103.0 2.384638 1.791928 0.433665 1.362685 2.025793 2.836760 13.639054
mean_speed_difference 103.0 0.336295 0.145443 0.094192 0.237638 0.298077 0.398279 0.922073
mean_walking_direction_difference 103.0 63.593606 20.966348 14.479058 48.625379 66.074569 80.126460 100.850062
mean_pace_asymmetry 103.0 0.430152 0.139142 0.110604 0.347419 0.421565 0.504862 0.876306
turn_duration 103.0 8.561165 5.561919 5.000000 5.400000 6.400000 9.400000 33.600000
In [ ]:
# box plot turn duration
sns.boxplot(x=df['turn_duration'])
plt.title('Box plot of turn duration')
plt.show()
No description has been provided for this image
In [ ]:
df['normalized_walking_direction_dtw'] = df['walking_direction_dtw'] / (df['turn_duration'] / 0.2)
df['normalized_speeds_dtw'] = df['speeds_dtw'] / (df['turn_duration'] / 0.2)
In [ ]:
df['abs_walking_direction_lag'] = df['walking_direction_lag'].abs()
df['abs_speeds_lag'] = df['speeds_lag'].abs()
In [ ]:
relevant_features = [
       'turn_duration', 
       'mean_distance',
       'mean_pace_asymmetry',
       'walking_direction_lag', 
       'abs_walking_direction_lag',
       'walking_direction_dtw', 
       'normalized_walking_direction_dtw', 
       # 'walking_direction_base_corr', 
       'walking_direction_lagged_corr', 
       # 'mean_walking_direction_difference', 
       'speeds_lag', 
       'abs_speeds_lag',
       'speeds_dtw', 
       'normalized_speeds_dtw', 
       # 'speeds_base_corr', 
       'speeds_lagged_corr', 
       # 'mean_speed_difference',
       ]
In [ ]:
corr = df[relevant_features].corr(method='pearson',numeric_only=True)
mask = np.abs(corr) < 0.3
plt.figure(figsize=(12, 10))
sns.heatmap(corr, annot=True, fmt=".2f", mask=mask)
# make it bigger
plt.title(f"Metrics Correlation Matrix - Original Data (n={len(df)})")
plt.show()
No description has been provided for this image
In [ ]:
# find rows with the same participant_id and path_num with overlapping start_idx and end_idx
df['overlapping'] = False
for index, row in df.iterrows():
    if len(df[(df['participant_id'] == row['participant_id']) & (df['path_num'] == row['path_num']) & (df['start_idx'] >= row['start_idx']) & (df['start_idx'] <= row['end_idx'])]) > 1 or \
        len(df[(df['participant_id'] == row['participant_id']) & (df['path_num'] == row['path_num']) & (df['end_idx'] >= row['start_idx']) & (df['end_idx'] <= row['end_idx'])]) > 1:
        df.at[index, 'overlapping'] = True
    
# overlapping_and_not_subset = df[df['overlapping'] == True]
overlapping_and_not_subset = df
oans = overlapping_and_not_subset

corr_oans = oans[relevant_features].corr(method='pearson',numeric_only=True)
mask = np.abs(corr_oans) < 0.3
plt.figure(figsize=(12, 10))
sns.heatmap(corr_oans, annot=True, fmt=".2f", mask=mask)
plt.title(f"Metrics Correlation Matrix - Overlapping Data (n={len(oans)})")
plt.show()
No description has been provided for this image
In [ ]:
threshold = 0.3
filtered_df = df[(df['walking_direction_lagged_corr'] > threshold) & (df['speeds_lagged_corr'] > threshold)]
# filtered_oans = filtered_df[filtered_df['overlapping'] == True]
filtered_oans = filtered_df

corr_filtered_oans = filtered_oans[relevant_features].corr(method='pearson',numeric_only=True)
mask = (np.abs(corr_filtered_oans) < 0.3) 
plt.figure(figsize=(12, 10))
sns.heatmap(corr_filtered_oans, annot=True, fmt=".2f", mask=mask)
plt.title(f"Metrics Correlation Matrix - Filtered Overlapping Data (n={len(filtered_oans)})")
plt.show()
No description has been provided for this image
In [ ]:
filtered_oans.describe().T
Out[ ]:
count mean std min 25% 50% 75% max
participant_id 80.0 1787.050000 666.917107 407.000000 2101.000000 2103.500000 2107.000000 2111.000000
path_num 80.0 2.237500 0.697985 1.000000 2.000000 2.000000 3.000000 3.000000
turn_num 80.0 2.612500 1.789518 1.000000 1.000000 2.000000 4.000000 9.000000
start_idx 80.0 557.275000 454.371648 54.000000 256.000000 456.000000 668.000000 2199.000000
end_idx 80.0 595.600000 453.169670 108.000000 300.000000 482.500000 698.250000 2225.000000
walking_direction_lag 80.0 -1.662500 12.969237 -41.000000 -8.250000 -1.000000 4.000000 47.000000
walking_direction_base_corr 80.0 0.115905 0.467918 -0.857578 -0.328202 0.212734 0.511793 0.941918
walking_direction_lagged_corr 80.0 0.517046 0.159682 0.315465 0.377550 0.467425 0.615642 0.948226
walking_direction_dtw 80.0 33.305556 18.261076 4.239983 21.018501 27.573042 42.750365 107.642701
speeds_lag 80.0 -2.175000 9.865905 -30.000000 -7.000000 -1.000000 0.000000 23.000000
speeds_base_corr 80.0 0.245733 0.373104 -0.797566 -0.031807 0.288961 0.542515 0.881916
speeds_lagged_corr 80.0 0.538861 0.148699 0.309378 0.428816 0.512229 0.679896 0.881916
speeds_dtw 80.0 26.937057 12.986413 10.358754 19.770054 23.836152 31.466053 90.532321
mean_distance 80.0 2.223850 1.635995 0.433665 1.364710 1.950531 2.689676 13.639054
mean_speed_difference 80.0 0.334028 0.133022 0.094192 0.246300 0.296413 0.405460 0.922073
mean_walking_direction_difference 80.0 62.887446 21.507993 14.479058 47.266777 64.734566 78.912842 100.850062
mean_pace_asymmetry 80.0 0.432057 0.128814 0.122233 0.353634 0.420388 0.509542 0.870018
turn_duration 80.0 7.665000 4.173063 5.000000 5.200000 5.800000 8.200000 33.400000
normalized_walking_direction_dtw 80.0 0.908792 0.384533 0.146206 0.632673 0.802530 1.103255 2.031908
normalized_speeds_dtw 80.0 0.734038 0.216814 0.256842 0.587907 0.732877 0.855633 1.422129
abs_walking_direction_lag 80.0 8.937500 9.492627 0.000000 1.750000 6.000000 13.000000 47.000000
abs_speeds_lag 80.0 6.925000 7.318738 0.000000 1.000000 5.000000 10.000000 30.000000
In [ ]:
from scipy.stats import pearsonr

for feature in relevant_features:
    to_display = []
    for feature2 in relevant_features:
        if not feature.startswith(feature2) and not feature2.startswith(feature) and not feature.endswith(feature2) and not feature2.endswith(feature)\
            and np.abs(corr_filtered_oans.loc[feature, feature2]) > 0.3:
            to_display.append(feature2)
    if len(to_display) == 0:
        continue
    # set plot grid of 1xlen(to_display)
    fig, axs = plt.subplots(int(np.ceil(len(to_display)/3)), min(len(to_display),3), figsize=(5*min(len(to_display),3),5*int(np.ceil(len(to_display)/3))))
    # print(axs.shape)
    axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot
    for i, feature2 in enumerate(to_display):
        peares = pearsonr(filtered_oans[feature], filtered_oans[feature2], alternative='two-sided')
        pcorr, p_val = peares.statistic, peares.pvalue
        CI = peares.confidence_interval(confidence_level=0.95)
        # Scatter plot
        sns.scatterplot(x=feature, y=feature2, data=filtered_oans, ax=axs[int(np.ceil(i/3))-1,i%3])
        # Regression line
        sns.regplot(x=feature, y=feature2, data=filtered_oans, scatter=False, line_kws={'color': 'red'}, ax=axs[int(np.ceil(i/3))-1,i%3])
        axs[int(np.ceil(i/3))-1,i%3].set_title(f"compared with {feature2}\ncorr: {round(corr_filtered_oans.loc[feature, feature2], 3)}, p_val: {round(p_val,5)}, CI: {[round(c,3) for c in CI]}", fontweight='bold')
    # add title "feature vs correlated features" to the plot
    fig.suptitle(f"{feature}'s correlations", fontweight='bold')
    plt.tight_layout()
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
from PIL import Image
import seaborn as sns
from scipy import stats
# for each feature, find highest and lowest valued row and display them
for feature in relevant_features:
    # print(f"Feature: {feature}")
    # print("Highest values:")
    
    h_res = filtered_oans.loc[filtered_oans[feature].nlargest(1).index, ['participant_id', 'person_robot', 'path_num', 'turn_num', feature]]
    # print(h_res)
    h_base_path = f"./turns/{h_res['participant_id'].values[0]}/{h_res['person_robot'].values[0]}/run_{h_res['path_num'].values[0]}/turn_{h_res['turn_num'].values[0]}/"
    fig, axs = plt.subplots(1, 4, figsize=(20, 5))
    paths_img = Image.open(h_base_path + "paths.png")
    axs[0].imshow(paths_img)
    axs[0].axis('off')
    distance_img = Image.open(h_base_path + "distance.png")
    axs[1].imshow(distance_img)
    axs[1].axis('off')
    walking_directions_img = Image.open(h_base_path + "walking_directions.png")
    axs[2].imshow(walking_directions_img)
    axs[2].axis('off')
    speeds_img = Image.open(h_base_path + "speeds.png")
    axs[3].imshow(speeds_img)
    axs[3].axis('off')
    to_print_dict = {k: round(v_val, 3) if isinstance(v_val, float) else v_val for k,v in h_res.to_dict().items() for v_key, v_val in v.items()}
    to_print_str = ", ".join([f"{k}: {v}" for k,v in to_print_dict.items()])
    fig.suptitle(f"{feature} - highest value\n {to_print_str}", fontweight='bold')
    plt.tight_layout()
    plt.show()
    # print("Lowest values:")
    l_res = filtered_oans.loc[filtered_oans[feature].nsmallest(1).index, ['participant_id', 'person_robot', 'path_num', 'turn_num', feature]]
    # print(l_res)
    l_base_path = f"./turns/{l_res['participant_id'].values[0]}/{l_res['person_robot'].values[0]}/run_{l_res['path_num'].values[0]}/turn_{l_res['turn_num'].values[0]}/"
    fig, axs = plt.subplots(1, 4, figsize=(20, 5))
    paths_img = Image.open(l_base_path + "paths.png")
    axs[0].imshow(paths_img)
    axs[0].axis('off')
    distance_img = Image.open(l_base_path + "distance.png")
    axs[1].imshow(distance_img)
    axs[1].axis('off')
    walking_directions_img = Image.open(l_base_path + "walking_directions.png")
    axs[2].imshow(walking_directions_img)
    axs[2].axis('off')
    speeds_img = Image.open(l_base_path + "speeds.png")
    axs[3].imshow(speeds_img)
    axs[3].axis('off')
    to_print_dict = {k: round(v_val, 3) if isinstance(v_val, float) else v_val for k,v in l_res.to_dict().items() for v_key, v_val in v.items()}
    to_print_str = ", ".join([f"{k}: {v}" for k,v in to_print_dict.items()])
    fig.suptitle(f"{feature} - lowest value\n {to_print_str}", fontweight='bold')
    plt.tight_layout()
    plt.show()
    print("\n\n")
No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


In [ ]:
fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot

for i, feature in enumerate(relevant_features):
    sns.histplot(data=filtered_oans, x=feature, kde=True, ax=axs[int(np.ceil(i/3))-1,i%3])
    axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
    axs[int(np.ceil(i/3))-1,i%3].set_xlabel('')
    axs[int(np.ceil(i/3))-1,i%3].set_ylabel('')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
import scipy.stats as stats

fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot

for i, feature in enumerate(relevant_features):
    stats.probplot(filtered_oans[feature], dist="norm", plot=axs[int(np.ceil(i/3))-1,i%3])
    axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
    axs[int(np.ceil(i/3))-1,i%3].set_xlabel('Theoretical Quantiles')
    axs[int(np.ceil(i/3))-1,i%3].set_ylabel('Ordered Values')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot

for i, feature in enumerate(relevant_features):
    sns.boxplot(data=filtered_oans, y=feature, ax=axs[int(np.ceil(i/3))-1,i%3])
    axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
    axs[int(np.ceil(i/3))-1,i%3].set_xlabel('')
    axs[int(np.ceil(i/3))-1,i%3].set_ylabel('')

plt.tight_layout()
plt.show()
No description has been provided for this image